1 Data input, cleaning and pre-processing

Load protein abundance data, pre-process them into a format suitable for network analysis, and clean the data by removing obvious outlier samples as well as proteins and samples with excessive numbers of missing entries.

1.a Loading expression data

The expression data is contained in the files:

  • 1-9_A1_A2_A3__precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_A1vsA2vsA3.xlsx that comes with this tutorial.
  • 10-14_C1_C2_C3__precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_C1vsC2vsC3.xlsx

These files contains several quality parametres. We have used grouped protein abundance to compare samples.

Files not share same proteins, this is one important point to discuss. One aporximation is consider missing protein with 0 abundance. Also it can be considered only the shared proetins.

library(WGCNA)
library(readxl)
library(clusterProfiler)
library(dplyr)

library(sva)
library(EnhancedVolcano)
library(limma)
library(ggVennDiagram)
library(org.Hs.eg.db)
library(DT)
library(igraph)
library(WGCNA)
datatable_jm<-function(x,column=NULL){
  
  # if(is.na(column)){column<-0}

datatable(
  x,
  extensions = 'Buttons', 
  filter = list(
    position = 'top', clear = T
  ),
  options = list(dom = 'Blfrtip',buttons = list(list(extend = 'colvis')),
                 buttons = list('copy', 'print',
                                list(extend = 'collection',
                                     buttons = c('csv', 'excel', 'pdf'),
                                     text = 'Download')),
                 columnDefs = list(list(visible=FALSE, targets=column))))}
data_A<-read_excel("/home/josep/Baixades/Resultats proteòmica(1)/ANDREU/1-9_A1_A2_A3__precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_A1vsA2vsA3.xlsx")
data_C<-read_excel("/home/josep/Baixades/Resultats proteòmica(1)/ANDREU/10-14_C1_C2_C3__precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_C1vsC2vsC3.xlsx")
data_AC<-read_excel("/home/josep/Baixades/Resultats proteòmica(1)/ANDREU/1-14_A1_A2_A3_C1_C2_C3_precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_AvsC.xlsx")
pheno<-read_excel("./nomenclatura.xlsx")

## comparacio exceles ####


data_C_abundance<-data_C[,grep("Abundances [(]Grouped[)]:",colnames(data_C),ignore.case = T)]
data_C_abundance<-data.frame(data_C_abundance)
colnames(data_C_abundance)<-pheno$`Nomenclatura unificada`[4:6]
rownames(data_C_abundance)<-data_C$Accession

data_A_abundance<-data_A[,grep("Abundances [(]Grouped[)]:",colnames(data_A),ignore.case = T)]
data_A_abundance<-data.frame(data_A_abundance)
rownames(data_A_abundance)<-data_A$Accession


colnames(data_A_abundance)<-pheno$`Nomenclatura unificada`[1:3]


# Ven diagramm de comuns ####

# Matriu amb A i C junts ####
data_abundance<-merge(data_C_abundance,data_A_abundance,by="row.names",all=T)
data_abundance_0<-data_abundance

data_abundance_0[is.na(data_abundance_0)] <- 0
dir.create("./DATA_IN")
dir.create("./RESULTATS")
dir.create("./RESULTATS/OBJECTES")
save(data_abundance_0,file ="./RESULTATS/OBJECTES/data_abundance_0" )
save(data_abundance,file ="./RESULTATS/OBJECTES/data_abundance" )
colnames(data_abundance)
## [1] "Row.names" "aP-EV1"    "aP-EV2"    "aP-EV3"    "PL-EV1"    "PL-EV2"   
## [7] "PL-EV3"
# Load the WGCNA package

# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
#Read in the female liver data set
pheno<-read_excel("./nomenclatura.xlsx")
# Take a quick look at what is in the data set:


load("RESULTATS/OBJECTES/data_abundance_0")
datExpr0 = as.data.frame(data_abundance_0)
rownames(datExpr0) = datExpr0$Row.names
datExpr0<-datExpr0[,-1]
datExpr0<-t(datExpr0)

The expression data set contains 6 samples. Note that each row corresponds to a protein and column to a sample.

1.b Checking data for excessive missing values and identification of outlier microarray samples

We first check for proteins and samples with too many missing values:

gsg = goodSamplesGenes(datExpr0, verbose = 3);
##  Flagging genes and samples with too many missing values...
##   ..step 1
##   ..Excluding 16 genes from the calculation due to too many missing samples or zero variance.
##   ..step 2
gsg$allOK
## [1] FALSE
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(colnames(datExpr0)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}
## Removing genes: P08670, P0CG39, P0DPH8
## P0DPH7, P11217, P12036, P14136, P42229, P49593, P62745, Q12840, Q5T0T0, Q5TID7, Q6A162, Q8TE73, Q9P2T1, Q9Y4G6

Next we cluster the samples (in contrast to clustering proteins that will come later) to see if there are any obvious outliers.

sampleTree = hclust(dist(datExpr0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
# sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)

1.c Loading clinical trait data

pheno<-read_excel("./nomenclatura.xlsx")


pheno$grup<-as.factor(c("NaCl","NaCl","NaCl","PR","PR","PR"))
# remove columns that hold information we do not need.
allTraits = data.frame(pheno)

# Form a data frame analogous to expression data that will hold the clinical traits.
nameSamples = rownames(datExpr0);
traitRows = match(nameSamples, allTraits$Nomenclatura.unificada);
datTraits = allTraits[traitRows,];
rownames(datTraits) = allTraits[traitRows, 1];
collectGarbage()

We now have the expression data in the variable datExpr, and the corresponding clinical traits in the variable datTraits.

Sample dendrogram

Before we continue with network construction and module detection, we visualize how the clinical traits relate to the sample dendrogram

# Re-cluster samples
sampleTree2 = hclust(dist(datExpr0), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry


traitColors = numbers2colors(as.numeric(datTraits$grup), signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")

Batch effect?

Groups are very different (between other things, are not share proteins).

Data are re-normalized with combat.

datExpr0<-
ComBat(
  t(datExpr0),batch = c(rep("ap",3),rep("PL",3)),
  mod = NULL,
  par.prior = TRUE,
  prior.plots = FALSE,
  mean.only = FALSE,
  ref.batch = NULL,
  BPPARAM = bpparam("SerialParam")
)
## Found 726 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
datExpr0<-t(datExpr0)

Sample dendrogram

Before we continue with network construction and module detection, we visualize how the clinical traits relate to the sample dendrogram

# Re-cluster samples
sampleTree2 = hclust(dist(datExpr0), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry


traitColors = numbers2colors(as.numeric(datTraits$grup), signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")

2.1 Automatic network construction and module detection

2.a.1 Choosing the soft-thresholding power: analysis of network topology

Constructing a weighted gene network entails the choice of the soft thresholding power β to which co-expression similarity is raised to calculate adjacency [1]. The authors of [1] have proposed to choose the soft thresholding power based on the criterion of approximate scale-free topology. We refer the reader to that work for more details; here we illustrate the use of the function pickSoftThreshold that performs the analysis of network topology and aids the user in choosing a proper soft-thresholding power.

# Choose a set of soft-thresholding powers
powers = c(c(1:50), seq(from = 52, to=100, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr0, 
                         networkType = "signed hybrid",
                        powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 1154.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 1154 of 1154
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1  0.33900  0.479         0.2080   353.0    366.00  561.0
## 2      2  0.04390  0.130        -0.1220   247.0    241.00  441.0
## 3      3  0.00942 -0.051        -0.0673   192.0    180.00  370.0
## 4      4  0.09780 -0.152         0.1150   158.0    147.00  325.0
## 5      5  0.27000 -0.237         0.4280   136.0    123.00  292.0
## 6      6  0.39700 -0.285         0.6030   119.0    107.00  266.0
## 7      7  0.49100 -0.335         0.7230   106.0     95.00  245.0
## 8      8  0.54600 -0.366         0.7150    96.2     85.20  229.0
## 9      9  0.62100 -0.389         0.7630    88.1     78.00  215.0
## 10    10  0.63700 -0.403         0.7490    81.3     72.20  203.0
## 11    11  0.70400 -0.422         0.7740    75.6     66.50  192.0
## 12    12  0.69100 -0.436         0.7470    70.8     61.60  183.0
## 13    13  0.69100 -0.450         0.7120    66.5     57.20  175.0
## 14    14  0.69800 -0.457         0.7190    62.8     53.80  167.0
## 15    15  0.68000 -0.467         0.6820    59.5     50.50  160.0
## 16    16  0.70000 -0.478         0.6910    56.6     47.70  154.0
## 17    17  0.70700 -0.489         0.6890    54.0     45.10  148.0
## 18    18  0.72800 -0.492         0.7070    51.6     42.90  143.0
## 19    19  0.72400 -0.496         0.7050    49.4     41.30  138.0
## 20    20  0.72900 -0.501         0.7080    47.5     39.40  133.0
## 21    21  0.74500 -0.503         0.7250    45.7     37.80  129.0
## 22    22  0.75700 -0.505         0.7340    44.0     36.10  125.0
## 23    23  0.78200 -0.507         0.7550    42.5     34.60  122.0
## 24    24  0.78400 -0.509         0.7530    41.0     33.60  119.0
## 25    25  0.79300 -0.513         0.7600    39.7     32.90  116.0
## 26    26  0.77100 -0.516         0.7340    38.5     32.00  113.0
## 27    27  0.77000 -0.518         0.7280    37.3     31.20  110.0
## 28    28  0.75800 -0.520         0.7090    36.2     30.50  108.0
## 29    29  0.75000 -0.524         0.6950    35.2     29.70  105.0
## 30    30  0.73500 -0.529         0.6750    34.3     29.00  103.0
## 31    31  0.72700 -0.539         0.6640    33.4     28.40  101.0
## 32    32  0.73400 -0.542         0.6730    32.5     27.90   98.6
## 33    33  0.72400 -0.546         0.6570    31.7     27.40   96.6
## 34    34  0.70900 -0.548         0.6350    30.9     26.80   94.7
## 35    35  0.71100 -0.550         0.6370    30.2     26.10   93.0
## 36    36  0.69900 -0.554         0.6210    29.5     25.60   91.3
## 37    37  0.71700 -0.557         0.6420    28.9     24.90   89.7
## 38    38  0.70700 -0.558         0.6300    28.2     24.30   88.2
## 39    39  0.69900 -0.561         0.6190    27.6     23.80   86.7
## 40    40  0.71000 -0.564         0.6320    27.1     23.40   85.3
## 41    41  0.71200 -0.569         0.6330    26.5     23.00   83.9
## 42    42  0.72200 -0.568         0.6450    26.0     22.40   82.6
## 43    43  0.71700 -0.569         0.6390    25.5     21.90   81.3
## 44    44  0.73700 -0.571         0.6640    25.0     21.40   80.1
## 45    45  0.75000 -0.571         0.6800    24.5     21.00   78.9
## 46    46  0.75300 -0.572         0.6830    24.1     20.50   77.8
## 47    47  0.75000 -0.575         0.6790    23.7     20.10   76.8
## 48    48  0.74400 -0.580         0.6710    23.2     19.70   75.7
## 49    49  0.75100 -0.586         0.6800    22.8     19.40   74.8
## 50    50  0.75700 -0.592         0.6880    22.5     19.20   73.8
## 51    52  0.75400 -0.601         0.6850    21.7     18.40   72.0
## 52    54  0.75500 -0.605         0.6870    21.0     17.60   70.3
## 53    56  0.74700 -0.615         0.6780    20.4     16.90   68.7
## 54    58  0.74200 -0.620         0.6750    19.8     16.30   67.1
## 55    60  0.75500 -0.627         0.6930    19.2     15.80   65.7
## 56    62  0.75400 -0.633         0.6950    18.7     15.30   64.3
## 57    64  0.76500 -0.639         0.7100    18.2     14.80   63.0
## 58    66  0.76300 -0.643         0.7050    17.7     14.40   61.7
## 59    68  0.76400 -0.644         0.7070    17.3     14.00   60.5
## 60    70  0.75300 -0.650         0.6940    16.9     13.60   59.3
## 61    72  0.76400 -0.652         0.7100    16.5     13.20   58.2
## 62    74  0.77000 -0.654         0.7200    16.1     12.80   57.2
## 63    76  0.76500 -0.659         0.7140    15.7     12.50   56.2
## 64    78  0.77200 -0.665         0.7270    15.4     12.20   55.2
## 65    80  0.78000 -0.668         0.7400    15.0     11.90   54.3
## 66    82  0.78500 -0.668         0.7490    14.7     11.60   53.4
## 67    84  0.78900 -0.670         0.7570    14.4     11.30   52.6
## 68    86  0.78800 -0.673         0.7560    14.1     11.00   51.8
## 69    88  0.79900 -0.675         0.7740    13.8     10.80   51.0
## 70    90  0.80400 -0.679         0.7810    13.6     10.50   50.3
## 71    92  0.80400 -0.682         0.7810    13.3     10.20   49.6
## 72    94  0.79600 -0.685         0.7750    13.1     10.00   48.9
## 73    96  0.80200 -0.690         0.7840    12.8      9.81   48.2
## 74    98  0.81200 -0.694         0.8020    12.6      9.63   47.5
## 75   100  0.81000 -0.699         0.8040    12.4      9.45   46.9
# Plot the results:
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")

# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

2.a.2 One-step network construction and module detection

Constructing the gene network and identifying modules is now a simple function call:

##### ATTENTION: IF you get this error:
##### Error in (new("standardGeneric", .Data = function (x, y = NULL, use = "everything",  : 
#####  unused arguments (weights.x = NULL, weights.y = NULL, cosine = FALSE)
##### do the following: 
#in RStudio... from the "Session" pulldown, press restart R.
#library(WGCNA).
#Then run the blockwiseModules function again


library(WGCNA)
pw<-11

# net = blockwiseModules(datExpr0, 
#                        power = pw,
#                        TOMType = "unsigned", 
#                        minModuleSize = 30,
#                        reassignThreshold = 0,
#                        mergeCutHeight = 0.25,
#                        numericLabels = TRUE,
#                        pamRespectsDendro = FALSE,
#                        saveTOMs = TRUE,
#                        saveTOMFileBase = "TOM_0",
#                        verbose = 3)

# save(net,file="net")

load("net")

We have chosen the soft thresholding power 11 , a relatively large minimum module size of 30, and a medium sensitivity (deepSplit=2) to cluster splitting. The parameter mergeCutHeight is the threshold for merging of modules. We have also instructed the function to return numeric, rather than color, labels for modules, and to save the Topological Overlap Matrix. The output of the function may seem somewhat cryptic, but it is easy to use. For example, net\(colors contains the module assignment, and net\)MEs contains the module eigengenes of the modules.

# table(net$colors)

The dendrogram can be displayed together with the color assignment using the following code:

# open a graphics window
sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

We note that if the user would like to change some of the tree cut, module membership, and module merging criteria, the package provides the function recutBlockwiseTrees that can apply modified criteria without having to recompute the network and the clustering dendrogram. This may save a sub-stantial amount of time. We now save the module assignment and module eigengene information necessary for subsequent analysis.

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
file = "FemaleLiver-02-networkConstruction-auto.RData")

3 Relating modules to external clinical trait

3a Quantifying module–trait associations

In this analysis we would like to identify modules that are significantly associated with the measured clinical traits. Since we already have a summary profile (eigengene) for each module, we simply correlate eigengenes with external traits and look for the most significant associations

# Define numbers of genes and samples
nGenes = ncol(datExpr0)
nSamples = nrow(datExpr0)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr0, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, as.numeric(datTraits$grup), use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

Since we have a moderately large number of modules and traits, a suitable graphical representation will help in reading the table. We color code each association by the correlation value:

# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits)[4],
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))

The analysis identifies the 2 significant module–trait associations. We will concentrate on GRUP as the trait of interest.

3.b Gene relationship to trait and important modules: Gene Significance and Module Membership

We quantify associations of individual genes with our trait of interest by defining Gene Significance GS as (the absolute value of) the correlation between the gene and the trait. For each module, we also define a quantitative measure of module membership MM as the correlation of the module eigengene and the gene expression profile. This allows us to quantify the similarity of all proteins on the array to every module

# Define variable weight containing the weight column of datTrait
grup = as.data.frame(as.numeric(datTraits$grup));
names(grup) = "grup"
# names (colors) of the modules
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr0, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));


names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")
geneTraitSignificance = as.data.frame(cor(datExpr0, grup, use = "p"))
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))
names(geneTraitSignificance) = paste("GS.", names(grup), sep="")
names(GSPvalue) = paste("p.GS.", names(grup), sep="")

##3.c Intramodular analysis: identifying genes with high GS and MM

Using the GS and MM measures, we can identify genes that have a high significance for weight as well as high module membership in interesting modules. As an example, we look at the brown module that has the highest association with weight. We plot a scatterplot of Gene Significance vs. Module Membership in the significant modules

moduls_int<-gsub("ME","",rownames(moduleTraitPvalue)[moduleTraitPvalue<=0.05])
moduls_int_no_sig<-gsub("ME","",rownames(moduleTraitPvalue)[moduleTraitPvalue==max(moduleTraitPvalue)])
module = moduls_int[1]
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7)
par(mfrow = c(1,1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

module = moduls_int[2]
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7)
par(mfrow = c(1,1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

GS and MM are correlated, illustrating that proteins highly significantly associated with a trait are often also the most important (central) elements of modules associated with the trait. The reader is encouraged to try this code with other significance trait/module correlation.

3.d Functional enrichment

We have found modules with high association with our trait of interest, and have identified their central players by the Module Membership measure. We now merge this statistical information with protein annotation.

# colnames(datExpr0)
# colnames(datExpr0)[moduleColors=="green"]

ORA enrichment

gene<-colnames(datExpr0)[moduleColors==moduls_int[1]]
gene<-unlist(strsplit(gene,"\r\n"))

kegg<-enrichKEGG(gene,
                 organism = "hsa",
                 keyType = "uniprot",
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 qvalueCutoff = 0.2,
                 use_internal_data = FALSE)

assign(paste0("kegg_",moduls_int[1]),kegg)



go <- enrichGO(gene          = gene,
               
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",keyType = "UNIPROT",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
        readable      = TRUE)

assign(paste0("go_",moduls_int[1]),go)







gene<-colnames(datExpr0)[moduleColors==moduls_int[2]]
gene<-unlist(strsplit(gene,"\r\n"))

kegg<-enrichKEGG(gene,
                 organism = "hsa",
                 keyType = "uniprot",
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 qvalueCutoff = 0.2,
                 use_internal_data = FALSE)

assign(paste0("kegg_",moduls_int[2]),kegg)



go <- enrichGO(gene          = gene,
               
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",keyType = "UNIPROT",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
        readable      = TRUE)

assign(paste0("go_",moduls_int[2]),go)
datatable(get(paste0("kegg_",moduls_int[1]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[1],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
datatable(get(paste0("go_",moduls_int[1]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[1],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
datatable(get(paste0("kegg_",moduls_int[2]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[2],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
datatable(get(paste0("go_",moduls_int[2]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[2],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))

5 Visualization of networks within R

5.a Visualizing the gene network

One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create such network plots; Fig. 1 was created using the following code. This code can be executed only if the network was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If the networks were calculated using the block-wise approach, the user will need to modify this code to perform the visualization in each block separately. The modification is simple and we leave it as an exercise for the interested reader.

# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
dissTOM = 1-TOMsimilarityFromExpr(datExpr0, power = 9);
## TOM calculation: adjacency..
## ..will not use multithreading.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7;
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA;
# Call the plot function
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

5.bVisualizing the network of eigengenes

# Recalculate module eigengenes
MEs = moduleEigengenes(datExpr0, moduleColors)$eigengenes
# Isolate weight from the clinical traits
grup = as.data.frame(as.numeric(datTraits$grup ))
names(grup) = "grup"
# Add the weight to existing module eigengenes

MET = orderMEs(cbind(MEs, grup))
# Plot the relationships among the eigengenes and the trait
sizeGrWindow(5,7.5);
par(cex = 0.9)
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
= 90)

The function produces a dendrogram of the eigengenes and trait(s), and a heatmap of their relationships. To split the dendrogram and heatmap plots, we can use the following code

# Plot the dendrogram

par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
plotHeatmaps = FALSE)

# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
plotDendrograms = FALSE, xLabelsAngle = 90)

6 Exporting to Cytoscape

TOM <-1-dissTOM

modules = moduls_int

probes = colnames(datExpr0)
inModule = is.finite(match(moduleColors, modules));

modProbes = probes[inModule];
# modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)

dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
# cyt = exportNetworkToCytoscape(modTOM,
# edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
# nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
# weighted = TRUE,
# threshold = 0.02,
# nodeNames = modProbes,
# # altNodeNames = modGenes,
# nodeAttr = moduleColors[inModule])
adj <- modTOM
adj[adj > 0.5] = 1
adj[adj != 1] = 0
network <- graph.adjacency(adj)
network <- simplify(network)  # removes self-loops
results <- net


V(network)$color <- moduleColors[inModule]
V(network)$size<-6
par(mar=c(0,0,0,0))
# remove unconnected nodes
network <- delete.vertices(network, degree(network)==0)
V(network)$label.cex = 0.2
igraph::plot.igraph(network,
                    # mark.groups = T,
                    layout=layout.fruchterman.reingold(network), 
     edge.arrow.size = 0.1)

modules = moduls_int[1]

probes = colnames(datExpr0)
inModule = is.finite(match(moduleColors, modules));

modProbes = probes[inModule];
# modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)

dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read

adj <- modTOM
adj[adj > 0.3] = 1
adj[adj != 1] = 0
network <- graph.adjacency(adj)
network <- simplify(network)  # removes self-loops
results <- net


V(network)$color <- moduleColors[inModule]
V(network)$size<-6
V(network)$label.cex = 0.2
par(mar=c(0,0,0,0))
# remove unconnected nodes
network <- delete.vertices(network, degree(network)==0)
igraph::plot.igraph(network,
                    # mark.groups = T,
                    layout=layout.fruchterman.reingold(network), 
     edge.arrow.size = 0.1)

modules = moduls_int[2]

probes = colnames(datExpr0)
inModule = is.finite(match(moduleColors, modules));

modProbes = probes[inModule];
# modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)

dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read

adj <- modTOM
adj[adj > 0.3] = 1
adj[adj != 1] = 0
network <- graph.adjacency(adj)
network <- simplify(network)  # removes self-loops
results <- net

V(network)$label.cex = 0.2
V(network)$color <- moduleColors[inModule]
V(network)$size<-6
par(mar=c(0,0,0,0))
# remove unconnected nodes
network <- delete.vertices(network, degree(network)==0)
igraph::plot.igraph(network,
                    # mark.groups = T,
                    layout=layout.fruchterman.reingold(network), 
     edge.arrow.size = 0.1)

7. Fold change

Data from excel with all samples

data_AC$logFC<-
data_AC$`Abundance Ratio (log2): (Lisat Plaquetes) / (plasma_PRP)`
data_AC$adj.P.Val<-
  data_AC$`Abundance Ratio Adj. P-Value: (Lisat Plaquetes) / (plasma_PRP)`
EnhancedVolcano(data_AC,
                 lab = data_AC$Accession,
                # selectLab = genes_int,
                labCol = 'black',
    labFace = 'bold',
    
                x = "logFC",
                y = "adj.P.Val",
                pCutoff = 0.05,
                FCcutoff = 2,
                 ylim = c(0,11),
                # xlim = c(-3,3),    
                pointSize = 1,
                labSize = 2,
                colAlpha = 2,
                legendPosition = 'top',
                legendLabSize = 8,
                legendIconSize = 4.0,
                drawConnectors = T,
                widthConnectors = 0.75,
                boxedLabels = T,                
                title = "DE proteins ",
                subtitle = "Data from excel with samples A and C")

Data from two excels without common proteins without logaritmic before limma

Data from two excels (Calculated wiht non log)

groups<-as.factor(c(rep("ap",3),rep("PL",3)))


f = factor(groups)

design = model.matrix(~ 0 + f)
colnames(design) = gsub("f","",colnames(design))

head(datExpr0[,1:5])
##        A0A075B6I0 A0A075B6I9 A0A075B6K4 A0A075B6S5 A0A087WW87\r\nP01614
## aP-EV1    1329905        0.0    5170883        0.0              5707564
## aP-EV2   10606925        0.0   30584432        0.0             25283103
## aP-EV3   46057573        0.0   42908825        0.0             36806870
## PL-EV1    6298201 15581573.9          0   323688.0             17063633
## PL-EV2    5967341   997393.8          0   477809.4             16790989
## PL-EV3   50893998 50143614.2          0  1580733.5             25831799
data.fit = lmFit(t(datExpr0),design)



contrast.matrix = makeContrasts(ap-PL,levels=design)

data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)


table_DEG_all <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr")
table_DEG <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr",p.value = 0.05)




EnhancedVolcano(table_DEG_all,
                lab = rownames(table_DEG_all),
                # selectLab = genes_int,
                labCol = 'black',
                labFace = 'bold',
                x = "logFC",
                y = "adj.P.Val",
                pCutoff = 0.05,
                FCcutoff = 1,
                 ylim = c(0,6),
                 # xlim = c(-3,3),    
                pointSize = 1,
                labSize = 2,
                colAlpha = 2,
                legendPosition = 'top',
                legendLabSize = 8,
                legendIconSize = 4.0,
                drawConnectors = T,
                widthConnectors = 0.75,
                boxedLabels = T,
                title = "DE proteins ",
                subtitle = "Data calculated with limma (no log2)")  

Data from two excels (Calculated and log after limma)

table_DEG_all$logFC[sign(table_DEG_all$logFC)<0]<-
log2(1/abs(table_DEG_all$logFC[sign(table_DEG_all$logFC)<0]))
table_DEG_all$logFC[sign(table_DEG_all$logFC)>0]<-
log2(table_DEG_all$logFC[sign(table_DEG_all$logFC)>0])

  

EnhancedVolcano(table_DEG_all,
                lab = rownames(table_DEG_all),
                # selectLab = genes_int,
                labCol = 'black',
                labFace = 'bold',
    
                x = "logFC",
                y = "adj.P.Val",
                pCutoff = 0.05,
                FCcutoff = 1,
                ylim = c(0,5),
                 # xlim = c(-3,3),    
                pointSize = 1,
                labSize = 2,
                colAlpha = 2,
                legendPosition = 'top',
                legendLabSize = 8,
                legendIconSize = 4.0,
                drawConnectors = T,
                widthConnectors = 0.75,
                boxedLabels = T,
                title = "DE proteins ",
                subtitle = "Data calculated with limma (log2 after limma)")

Compare DEG between both methods

data_AC_deg<-
data_AC %>% 
  filter(adj.P.Val<=0.05) %>%
  dplyr::select(Accession)
table_DEG_all_deg<-
rownames(table_DEG_all %>% 
  filter(adj.P.Val<=0.05)  )
  



# List of items
x <- list(Precalculated = data_AC_deg$Accession, Limma = table_DEG_all_deg)

# 2D Venn diagram

ggVennDiagram(x,set_color ="red")+
scale_fill_gradient2()+
      scale_color_manual(values = c("black","black"))

Data from two excels with common proteins and log before limma

With proteins with 0 expression. 0 converted to 0.1

datExpr0_converted<-datExpr0
datExpr0_converted[datExpr0_converted==0]<-0.1
datExpr0_l<-log2(datExpr0_converted)


groups<-as.factor(c(rep("ap",3),rep("PL",3)))


f = factor(groups)

design = model.matrix(~ 0 + f)
colnames(design) = gsub("f","",colnames(design))


data.fit = lmFit(t(datExpr0_l),design)



contrast.matrix = makeContrasts(ap-PL,levels=design)

data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)


table_DEG_all <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr")
table_DEG_with_0 <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr",p.value = 0.05)

table_DEG_all<-
table_DEG_all %>% 
  mutate(celltype1=ifelse(logFC< (-30),"Negatius",ifelse(logFC>30,"Positius","NO")))

celltype1<-rownames(table_DEG_all)[table_DEG_all$celltype1=="Negatius"]
celltype2<-rownames(table_DEG_all)[table_DEG_all$celltype1=="Positius"]
EnhancedVolcano(table_DEG_all,
                # encircle =  c(celltype1),encircleAlpha = 0.5,
                # shade = celltype2,
                #   shadeBins = 10,
                # shadeSize=1000,
                
                
                      
                lab = rownames(table_DEG_all),
                # selectLab = genes_int,
                labCol = 'black',
    labFace = 'bold',
    
                x = "logFC",
                y = "adj.P.Val",
                pCutoff = 0.05,
                FCcutoff = 1,
                 # ylim = c(0,7),
                 # xlim = c(-3,3),    
                pointSize = 1,
                labSize = 2,
                colAlpha = 2,
                legendPosition = 'top',
                legendLabSize = 8,
                legendIconSize = 4.0,
                drawConnectors = T,
                widthConnectors = 0.75,
                boxedLabels = T,                
                title = "DE proteins",
                subtitle = "data log2 wiht 0")  

Complete cases (without proteins with 0 expression)

data_abundance_complete<-data_abundance[complete.cases(data_abundance),]

datExprcomplete = as.data.frame(data_abundance_complete)
rownames(datExprcomplete) = datExprcomplete$Row.names
datExprcomplete<-datExprcomplete[,-1]
datExprcomplete<-t(datExprcomplete)
datExpr0_converted<-datExprcomplete

datExpr0_l<-log2(datExpr0_converted)


groups<-as.factor(c(rep("ap",3),rep("PL",3)))


f = factor(groups)

design = model.matrix(~ 0 + f)
colnames(design) = gsub("f","",colnames(design))


data.fit = lmFit(t(datExpr0_l),design)



contrast.matrix = makeContrasts(ap-PL,levels=design)
colnames(contrast.matrix)
## [1] "ap - PL"
data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)


table_DEG_all <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr")
table_DEG_complete <- topTable(data.fit.eb, number = Inf,adjust.method = "fdr",p.value = 0.05)

EnhancedVolcano(table_DEG_all,
                lab = rownames(table_DEG_all),
                # selectLab = genes_int,
                labCol = 'black',
    labFace = 'bold',
    
                x = "logFC",
                y = "adj.P.Val",
                pCutoff = 0.05,
                FCcutoff = 1,
                  ylim = c(0,6),
                 # xlim = c(-3,3),    
                pointSize = 1,
                labSize = 7,
                colAlpha = 2,
                legendPosition = 'top',
                legendLabSize = 8,
                legendIconSize = 4.0,
                drawConnectors = T,
                widthConnectors = 0.75,
                boxedLabels = T,                
                title = "DE proteins",
                subtitle = "data two excels  wihtout 0")  

Compare DEG between both methods

data_AC_deg<-
data_AC %>% 
  filter(adj.P.Val<=0.05) %>%
  dplyr::select(Accession)
table_DEG_all_deg<-
rownames(table_DEG_all %>% 
  filter(adj.P.Val<=0.05)  )
  



# List of items
x <- list(Complete_cases = rownames(table_DEG_complete), With_0 = rownames(table_DEG_with_0))

# 2D Venn diagram

ggVennDiagram(x,set_color ="red")+
# scale_fill_gradient2()+
      scale_color_manual(values = c("black","black"))

x <- list(Complete_cases = rownames(table_DEG_complete), With_0 = rownames(table_DEG_with_0),Precalculated = data_AC_deg$Accession, Limma = table_DEG_all_deg)

ggVennDiagram(x,set_color ="red")+
 scale_fill_gradient2()

8. Enrichment

ORA

data_AC_sig<-
data_AC %>% 
  filter(adj.P.Val<=0.05) 

gene<-data_AC_sig$logFC

names(gene)<-data_AC_sig$Accession

kegg<-enrichKEGG(names(gene),
                 organism = "hsa",
                 keyType = "uniprot",
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 qvalueCutoff = 0.2,
                 use_internal_data = FALSE)


kegg_sig<-
kegg@result %>%
  filter(p.adjust<=0.05)
datatable_jm(kegg_sig,"geneID")
go <- enrichGO(gene          = names(gene),
               
               OrgDb         = org.Hs.eg.db,
               ont           = "BP",
               keyType = "UNIPROT",
               pAdjustMethod = "BH",
               pvalueCutoff  = 0.01,
               qvalueCutoff  = 0.05,
        readable      = TRUE)


go_sig<-
go@result %>%
  filter(p.adjust<=0.05)

datatable_jm(go_sig,"geneID")

GSEA

gene<-data_AC$logFC

names(gene)<-data_AC$Accession
gene_list<-sort(gene,decreasing = T)
set.seed(123)
go_gsea <- gseGO(geneList=gene_list, 
             ont ="BP", 
             keyType = "UNIPROT", 
             
             pvalueCutoff = 0.05, 
             verbose = TRUE, 
             OrgDb = org.Hs.eg.db, 
             pAdjustMethod = "BH")

go_gsea_sig<- go_gsea@result  %>% 
  filter(p.adjust<0.05)
datatable_jm(go_gsea_sig,"core_enrichment")
kegg_gsea <- gseKEGG(geneList     = gene_list,
               organism     = "hsa",
               
               pvalueCutoff = 0.05,
               pAdjustMethod = "BH",
               keyType       = "uniprot")


kegg_gsea_sig<- kegg_gsea@result  %>% 
  filter(p.adjust<0.05)
datatable_jm(kegg_gsea_sig,"core_enrichment")